require(tidyverse)
require(lmtest)
require(data.table)
require(plm)
require(stargazer)
require(lubridate)
require(ivreg)
require(DescTools)
require(rsq)
require(miceadds)
require(jtools)
require(stats)
require(haven)
require(sandwich)
require(fastDummies)
require(sqldf)
require(scales)
require(stringr)
require(MatchIt)
require(modelsummary)
options(modelsummary_format_numeric_latex = "plain")
require(readxl)
library(purrr)

############################# Data Preparation - Analyst Sample #############################
# Set Working Directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#### Load IBES Summary History - Summary Statistics file (accessed via WRDS)
analysts <- read.csv("ibes_summary_eps.csv")
names(analysts) <- tolower(names(analysts))

### Initial Filters ###: Keep annual EPS forecasts in USD
# Annual Forecasts
analysts <- subset(analysts, fiscalp == "ANN")
# EPS Forecasts
analysts <- subset(analysts, measure == "EPS")
# Forecasts in USD
analysts <- subset(analysts, curcode == "USD")

# For subsequent merges: define the month before the analyst consensus date
analysts$fyear_end <- as.Date(analysts$fpedats)
analysts$analyst_date <- as.Date(analysts$statpers)  
analysts$previous_month <- analysts$analyst_date %m-% months(1)
analysts$month <- format(analysts$previous_month, "%Y%m")
analysts$previous_month <- NULL

### Import University of Michigan Consumer Sentiment data (based on monthly file)
sentiment <- read.csv("u_of_m_index.csv")

# Create month variable for subsequent merge
month_lookup <- c("January" = "01", "February" = "02", "March" = "03", "April" = "04",
                  "May" = "05", "June" = "06", "July" = "07", "August" = "08",
                  "September" = "09", "October" = "10", "November" = "11", "December" = "12")
sentiment$numeric_month <- month_lookup[sentiment$Month]
rm(month_lookup)
sentiment$month <- paste0(sentiment$YYYY, sentiment$numeric_month)
sentiment <- sentiment %>% select(month, ICS_ALL)
sentiment <- sentiment %>% rename(sentiment = ICS_ALL)

analysts <- merge(analysts, sentiment, by = "month")
rm(sentiment)

### Import pre-processed stock market data
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
         month = V2,
         date = V3,
         size = V4,
         stock_issues = V5,
         beta = V6,
         ivol = V7,
         sue = V8,
         sue_date = V9,
         short = V10,
         turnover = V11,
         bm = V12,
         op = V13,
         ep = V14,
         disp = V15,
         analysts = V16,
         vola = V17,
         ret1m = V18,
         ret2m = V19,
         ret3m = V20,
         ret1yr = V21,
         ret2yr = V22,
         ret3yr = V23,
         ret4yr = V24,
         ret5yr = V25,
         cgo = V26,
         percgain = V27,
         cgo_placebo = V28,
         cgo_daily = V29
  )


linking_table <- read.csv("ncsuip-permno-cusip-merge.csv")
names(linking_table) <- tolower(names(linking_table))
stocks <- merge(stocks, linking_table, by = c("permno","date"))
rm(linking_table)

# IBES CUSIP corresponds to CRSPs NCUSIP - adopt CRSP's terminology
analysts <- analysts %>% rename(ncusip = cusip)
analysts <- merge(analysts,stocks, by = c("ncusip","month")) 
rm(stocks)

### Import Merged CRSP/Compustat data
compustat <- read.csv("crsp_compustat.csv")
names(compustat) <- tolower(names(compustat))
compustat <- compustat %>% arrange(gvkey, datadate) %>%  
  mutate(price = prcc_f / adjex_f) %>%  
  group_by(gvkey) %>%
  mutate(price_prev = lag(price)) %>%
  ungroup()

compustat$prd_yr <- substr(compustat$datadate, 1, 4)
compustat$prd_mon <- substr(compustat$datadate, 6, 7)
compustat$prd_mon <- str_remove(compustat$prd_mon, "^0+")
compustat <- compustat %>% rename(permno = lpermno,
                                  cusip_9 = cusip)
compustat$cusip <- substr(compustat$cusip_9, 1,8)
compustat <- compustat %>% select(permno, gvkey, cusip, prd_yr, prd_mon, price_prev, sic)

analysts$prd_yr <- year(analysts$fpedats) 
analysts$prd_mon <- month(analysts$fpedats)

analysts <- merge(analysts, compustat, by = c("permno","prd_yr","prd_mon"))
rm(compustat)

### Apply final filters
# Limit to guidance within 365 days of firm-year end
analysts$horizon <- as.numeric(analysts$fyear_end - analysts$analyst_date)
analysts <- subset(analysts, horizon >= 1)
analysts <- subset(analysts, horizon < 365)

# Exclude utilities and regulated industries (SIC 4900 - 4949)
analysts <- subset(analysts, sic < 4900 | sic > 4949)
# Exclude financial services (SIC 6000 - 6999)
analysts <- subset(analysts, sic < 6000 | sic > 6999)

# Date restriction:
analysts <- subset(analysts, year(analyst_date) >= 2001 & year(analyst_date) <= 2023)

# Drop penny stocks
analysts <- subset(analysts, price_prev >= 10)

# Focus on observations with above-median analyst following
analysts <- analysts %>%
  filter(numest >= median(numest, na.rm = TRUE))

# Exclude "stale" forecasts consensus
analysts <- analysts %>%
  mutate(
    analyst_date = as.Date(analyst_date),   
    medest_lag   = dplyr::lag(medest),
    .by = c(permno, fyear_end)
  ) %>%
  arrange(permno, fyear_end, analyst_date)
analysts <- subset(analysts, medest != medest_lag)

### Final variable definitions
analysts$analyst_bias_med <- ((analysts$medest - analysts$actual) / analysts$price_prev) * 100

# Log of size and horizon 
analysts$size <- log(1000 * analysts$size)
analysts$horizon <- log(analysts$horizon)

# Loss Dummy
analysts$loss_d <- ifelse(analysts$ep < 0, 1,0)

# Define process risk based on SIC
analysts$process <- ifelse(analysts$sic >= 2833 & analysts$sic <= 2836 | analysts$sic >= 8731 & analysts$sic <= 8734 | analysts$sic >= 3570 & analysts$sic <= 3577 | analysts$sic >= 7371 & analysts$sic <= 7379 | analysts$sic >= 3600 & analysts$sic <= 3674 | analysts$sic >= 5200 & analysts$sic <= 5961, 1, 0)

# Industry dummies based on two-digit SIC code
analysts$sic_two <- factor(substr(analysts$sic, 1, 2))

# Year dummies
analysts$year <- factor(analysts$prd_yr)

# Firm dummies
analysts$permno <- factor(analysts$permno)

# Net Equity Issuer
analysts$stock_issues_d <- ifelse(analysts$stock_issues > 0, 1, 0)

# Analyst following based on number of observations in consensus
analysts$analysts <- analysts$numest

# Drop if missing main dependent or independent variable
analysts <- subset(analysts, !is.na(analyst_bias_med) & !is.na(cgo))

### Winsorization - winsorize all continuous variables at 1% level
winsor_vars <- c("analyst_bias_med","cgo","percgain","beta","size","bm","horizon","op","analysts","disp","vola","stock_issues","sentiment")
analysts <- analysts %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

### Define different sets of controls
controls_1 <- c("beta","size","bm","sic_two","year")
controls_2 <- c(controls_1, "horizon","loss_d","process")
controls_3 <- c(controls_2, "op","analysts","disp","vola","stock_issues_d","sentiment")
controls_4 <- c("beta","size","bm","horizon","loss_d","op","analysts","disp","vola","stock_issues_d","sentiment","permno","year")

controls <- list(
  controls_1 = controls_1,
  controls_2 = controls_2,
  controls_3 = controls_3,
  controls_4 = controls_4
)

############################# Data Analyses - Analyst Sample #############################
############################# Table 7 #############################
c1 <- lm(analyst_bias_med ~ cgo + sic_two + year, data = analysts)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = analysts, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = analysts, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = analysts, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = analysts, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A6 #############################
c1 <- lm(analyst_bias_med ~ percgain + sic_two + year, data = analysts)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = analysts, update(analyst_bias_med ~ percgain, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = analysts, update(analyst_bias_med ~ percgain, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = analysts, update(analyst_bias_med ~ percgain, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = analysts, update(analyst_bias_med ~ percgain, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A32 #############################
ibes_guidance <- read.csv("ibes_guidance.csv")
ibes_guidance <- ibes_guidance %>%
  select(ticker, prd_yr) %>%
  distinct(ticker, prd_yr, .keep_all = TRUE)

analysts_wo_guid <- analysts %>%
  anti_join(ibes_guidance, by = c("ticker", "prd_yr"))

c1 <- lm(analyst_bias_med ~ cgo + sic_two + year, data = analysts_wo_guid)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = analysts_wo_guid, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = analysts_wo_guid, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = analysts_wo_guid, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = analysts_wo_guid, update(analyst_bias_med ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)


